# Replication File for "Combining List Experiment and Direct Question Estimates of Sensitive Behavior Prevalence"
# Peter M. Aronow, Alexander Coppock, Forrest W. Crawford, Donald P. Green
# Main Experiment Analysis

rm(list=ls())
library(xtable)

# Set your working directory to ACCG_list_replication_archive
setwd("/Users/Alex/Documents/Dropbox/Columbia/Collaboration/Work for Don/list experiments/ACCG_list_replication_archive/")

### Bring in functions and data
source("programs/ACCG_list_source.R")
main_exp <- read.csv("data/ACCG_main_experiment.csv")

# Complete Case Analysis, removes 9 units with incomplete answers
main_exp <- subset(main_exp, !is.na(direct1)&!is.na(direct2)&!is.na(direct3)&!is.na(direct4)&!is.na(direct5)&
                        !is.na(list1N)&!is.na(list2N)&!is.na(list3N)&!is.na(list4N)&!is.na(list5N)) 

# Table 1: Number of Subjects in Each Treatment Condition
treatment_ns<- with(main_exp,
rbind(
  cbind( table(directsfirst, directsfirst),table(directsfirst, list1treat),table(directsfirst, list2treat),table(directsfirst, list3treat),table(directsfirst, list4treat),table(directsfirst, list5treat)),    
  cbind( matrix(NA, 2, 2) ,table(list1treat, list1treat),table(list1treat, list2treat),table(list1treat, list3treat),table(list1treat, list4treat),table(list1treat, list5treat)),  
  cbind( matrix(NA, 2, 2) ,matrix(NA, 2, 2) ,table(list2treat, list2treat),table(list2treat, list3treat),table(list2treat, list4treat),table(list2treat, list5treat)),  
  cbind( matrix(NA, 2, 2) ,matrix(NA, 2, 2) ,matrix(NA, 2, 2) ,table(list3treat, list3treat),table(list3treat, list4treat),table(list3treat, list5treat)),  
  cbind( matrix(NA, 2, 2) ,matrix(NA, 2, 2) ,matrix(NA, 2, 2) ,matrix(NA, 2, 2) ,table(list4treat, list4treat),table(list4treat, list5treat)),
  cbind( matrix(NA, 2, 2) ,matrix(NA, 2, 2) ,matrix(NA, 2, 2) ,matrix(NA, 2, 2) ,matrix(NA, 2, 2) ,table(list5treat, list5treat))
  )
)  

treatment_ns_ordered <-  treatment_ns[,c(2,1,4,3,6,5,8,7,10,9, 12, 11)]
treatment_ns_ordered <-  treatment_ns_ordered[c(2,1,4,3,6,5,8,7,10,9,12,11),]
xtable(treatment_ns_ordered, caption="Number of Subjects in each Treatment")

# Table 2: Covariate Balance: List Experiment 1
chi_age <- chisq.test(table(main_exp$age, main_exp$firstbyone), simulate.p.value=T, B=10000)
chi_gender <- chisq.test(table(main_exp$gender, main_exp$firstbyone), simulate.p.value=T, B=10000)
chi_ideology <- chisq.test(table(main_exp$ideology, main_exp$firstbyone), simulate.p.value=T, B=10000)
chi_education <- chisq.test(table(main_exp$education, main_exp$firstbyone), simulate.p.value=T, B=10000)
chi_race <- chisq.test(table(main_exp$race, main_exp$firstbyone), simulate.p.value=T, B=10000)

age_table <- round(prop.table(table(main_exp$age, main_exp$firstbyone), margin =2)*100 ,digits=2)
age_table <- rbind(age_table, c(paste0("p=", round(chi_age$p.value, 2)), NA, paste0("chi^2=", round(chi_age$statistic, 2)), NA)) 

gender_table <- round(prop.table(table(main_exp$gender, main_exp$firstbyone), margin =2)*100 ,digits=2)
gender_table <- rbind(gender_table, c(paste0("p=", round(chi_gender$p.value, 2)), NA, paste0("chi^2=", round(chi_gender$statistic, 2)), NA)) 

ideology_table <- round(prop.table(table(main_exp$ideology, main_exp$firstbyone), margin =2)*100 ,digits=2)
ideology_table <- ideology_table[c(3,4,1,2),]
ideology_table <- rbind(ideology_table, c(paste0("p=", round(chi_ideology$p.value, 2)), NA, paste0("chi^2=", round(chi_ideology$statistic, 2)), NA)) 

education_table <- round(prop.table(table(main_exp$education, main_exp$firstbyone), margin =2)*100 ,digits=2)
education_table <- education_table[c(4,3,5,1,2),]
education_table <- rbind(education_table, c(paste0("p=", round(chi_education$p.value, 2)), NA, paste0("chi^2=", round(chi_education$statistic, 2)), NA)) 

race_table <- round(prop.table(table(main_exp$race, main_exp$firstbyone), margin =2)*100 ,digits=2)
race_table <- race_table[c(7,1,2,3,4,5,6),]
race_table <- rbind(race_table, c(paste0("p=", round(chi_race$p.value, 2)), NA, paste0("chi^2=", round(chi_race$statistic, 2)), NA)) 

treatment_group_ns <- round(table(main_exp$firstbyone), digits=0)
covariates_table <- rbind(age_table, gender_table, ideology_table, education_table, race_table, treatment_group_ns)
rownames(covariates_table)[c(which(rownames(covariates_table)==""))] <- 1:5

covariates_table <- covariates_table[,c(4, 2, 3, 1)]
colnames(covariates_table) <- c("1st x L1:T","1st x L1:C","2nd x L1:T","2nd x L1:C")
xtable(covariates_table, digits=2)

# 1: nuclear power
# 2: public transportation
# 3: Spanish-speaking
# 4: muslims
# 5: CNN

# V = List response
# Z = Treatment Assignment
# Y = Direct Question Response

studyA<- subset(main_exp, listsfirst==0)
studyB<- subset(main_exp, listsfirst==1)

# Table 3: Study A (Directs First): Three Estimates of Prevalence
studyA_results <- with(studyA,
                       rbind(
                        whole_shebang(list1N, list1treat, direct1),
                        whole_shebang(list2N, list2treat, direct2),
                        whole_shebang(list3N, list3treat, direct3),
                        whole_shebang(list4N, list4treat, direct4),
                        whole_shebang(list5N, list5treat, direct5)
))

rownames(studyA_results) <- c("Nuclear Power","Public Transportation", "Spanish-speaking", "Muslim Teachers", "CNN")
colnames(studyA_results) <- c("Direct Question", "Direct SE", "Conventional Estimate", "Conventional SE", "New Estimate", "New SE", "Variance Improvement")
xtable(studyA_results, digits=c(3,3,3,3,3,3,3,1))

# Table 4: Study A (Directs First): Placebo Test I

placebo_I_table <- with(studyA, 
                         rbind(placeboI_test(V = list1N, Z = list1treat, Y = direct1),
                               placeboI_test(V = list2N, Z = list2treat, Y = direct2),
                               placeboI_test(V = list3N, Z = list3treat, Y = direct3),
                               placeboI_test(V = list4N, Z = list4treat, Y = direct4),
                               placeboI_test(V = list5N, Z = list5treat, Y = direct5)))

colnames(placebo_I_table) <- c("Estimate","SE","Pr($\beta=0$)", "N")
rownames(placebo_I_table) <- c("Nuclear Power","Public Transportation", "Spanish-speaking", "Muslim Teachers", "CNN")
xtable(placebo_I_table, caption="Study A: Placebo Tests", digits=3, display=c("f", "f", "f", "f", "d"))

# Table 5: Study A (Directs First): Placebo Test II

placebo_II_table <- with(studyA, 
                         rbind(placeboII_test(Z = list1treat, Y = direct1),
                               placeboII_test(Z = list2treat, Y = direct2),
                               placeboII_test(Z = list3treat, Y = direct3),
                               placeboII_test(Z = list4treat, Y = direct4),
                               placeboII_test(Z = list5treat, Y = direct5)))

rownames(placebo_II_table) <- c("Nuclear Power","Public Transportation", "Spanish-speaking", "Muslim Teachers", "CNN")
colnames(placebo_II_table) <- c("Estimate", "SE", "p-value", "n")

placebo_II_xtable <- xtable(placebo_II_table,digits=3, display=c("f", "f", "f", "f", "d"))
placebo_II_xtable

# Table 6: Study B (Lists First): Three Estimates of Prevalence
studyB_results <- with(studyB,
                       rbind(
                         whole_shebang(list1N, list1treat, direct1),
                         whole_shebang(list2N, list2treat, direct2),
                         whole_shebang(list3N, list3treat, direct3),
                         whole_shebang(list4N, list4treat, direct4),
                         whole_shebang(list5N, list5treat, direct5)
                       ))

rownames(studyB_results) <- c("Nuclear Power","Public Transportation", "Spanish-speaking", "Muslim Teachers", "CNN")
colnames(studyB_results) <- c("Direct Question", "Direct SE", "Conventional Estimate", "Conventional SE", "New Estimate", "New SE", "Variance Improvement")
xtable(studyB_results, digits=c(3,3,3,3,3,3,3,1))

# Table 7: Study B (Lists First): Placebo Test I
placebo_I_table <- with(studyB, 
                        rbind(placeboI_test(V = list1N, Z = list1treat, Y = direct1),
                              placeboI_test(V = list2N, Z = list2treat, Y = direct2),
                              placeboI_test(V = list3N, Z = list3treat, Y = direct3),
                              placeboI_test(V = list4N, Z = list4treat, Y = direct4),
                              placeboI_test(V = list5N, Z = list5treat, Y = direct5)))

colnames(placebo_I_table) <- c("Estimate","SE","Pr($\beta=0$)", "N")
rownames(placebo_I_table) <- c("Nuclear Power","Public Transportation", "Spanish-speaking", "Muslim Teachers", "CNN")
xtable(placebo_I_table, caption="Study B: Placebo Tests", digits=3, display=c("f", "f", "f", "f", "d"))

# Table 8: Study B (Lists First): Placebo Test II

placebo_II_table <- with(studyB, 
                         rbind(placeboII_test(Z = list1treat, Y = direct1),
                               placeboII_test(Z = list2treat, Y = direct2),
                               placeboII_test(Z = list3treat, Y = direct3),
                               placeboII_test(Z = list4treat, Y = direct4),
                               placeboII_test(Z = list5treat, Y = direct5)))

rownames(placebo_II_table) <- c("Nuclear Power","Public Transportation", "Spanish-speaking", "Muslim Teachers", "CNN")
colnames(placebo_II_table) <- c("Estimate", "SE", "p-value", "n")

placebo_II_xtable <- xtable(placebo_II_table,digits=3, display=c("f", "f", "f", "f", "d"))
placebo_II_xtable

# Table A1

studyA_results <- with(studyA,
                       rbind(
                         whole_shebang(list1N, list1treat, direct1),
                         whole_shebang(list2N, list2treat, direct2),
                         whole_shebang(list3N, list3treat, direct3),
                         whole_shebang(list4N, list4treat, direct4),
                         whole_shebang(list5N, list5treat, direct5)
                       ))

studyB_results <- with(studyB,
                       rbind(
                         whole_shebang(list1N, list1treat, direct1),
                         whole_shebang(list2N, list2treat, direct2),
                         whole_shebang(list3N, list3treat, direct3),
                         whole_shebang(list4N, list4treat, direct4),
                         whole_shebang(list5N, list5treat, direct5)
                       ))

factorial_design_table <- studyA_results[,1:6] - studyB_results[,1:6]
factorial_design_table[,c(2,4,6)] <- sqrt(studyA_results[,c(2,4,6)]^2 + studyB_results[,c(2,4,6)]^2)

rownames(factorial_design_table) <- c("Nuclear Power","Public Transportation", "Spanish-speaking", "Muslim Teachers", "CNN")
colnames(factorial_design_table) <- rep(c("Difference", "SE"), 3)

xtable(factorial_design_table,digits=3)


# Figure 1: Study A (Directs First): Three Estiamtes of Prevalance

means <- c(studyA_results[1, c(1, 3, 5)],NA, studyA_results[2, c(1, 3, 5)],NA,studyA_results[3, c(1, 3, 5)],NA,studyA_results[4, c(1, 3, 5)],NA,studyA_results[5, c(1, 3, 5)])
ses <- c(studyA_results[1, c(2, 4, 6)],NA, studyA_results[2, c(2, 4, 6)],NA,studyA_results[3, c(2, 4, 6)],NA,studyA_results[4, c(2, 4, 6)],NA,studyA_results[5, c(2, 4, 6)])
errs <- ses*1.96

pdf("StudyA.pdf", height=7, width= 10)
gray_levels= c(.4, .6, .8, 0)
comp <- barplot(means, ylim=c(0,1), axisnames=F, col=gray(gray_levels), ylab="Prevalence")
arrows(comp,means-errs ,comp,means+errs, code=3, angle=90, length=.1)
legend("topright", bty= "n", c("Direct Estimate", "Standard List Estimate", "Combined Estimate"), cex=1, fill=gray(gray_levels))
axis(1, at= comp[c(2,6,10,14, 18)], labels= c("Nuclear Power","Public Transportation", "Spanish-speaking", "Muslim Teachers", "CNN"), tick=F)
dev.off()

# Figure 2: Study B (Lists First): Three Estiamtes of Prevalance

means <- c(studyB_results[1, c(1, 3, 5)],NA, studyB_results[2, c(1, 3, 5)],NA,studyB_results[3, c(1, 3, 5)],NA,studyB_results[4, c(1, 3, 5)],NA,studyB_results[5, c(1, 3, 5)])
ses <- c(studyB_results[1, c(2, 4, 6)],NA, studyB_results[2, c(2, 4, 6)],NA,studyB_results[3, c(2, 4, 6)],NA,studyB_results[4, c(2, 4, 6)],NA,studyB_results[5, c(2, 4, 6)])
errs <- ses*1.96

pdf("StudyB.pdf", height=7, width= 10)
gray_levels= c(.4, .6, .8, 0)
comp <- barplot(means, ylim=c(0,1), axisnames=F, col=gray(gray_levels), ylab="Prevalence")
arrows(comp,means-errs ,comp,means+errs, code=3, angle=90, length=.1)
legend("topright", bty= "n", ncol = 1, c("Direct Estimate", "Standard List Estimate", "Combined Estimate"), cex=1, fill=gray(gray_levels))
axis(1, at= comp[c(2,6,10,14, 18)], labels= c("Nuclear Power","Public Transportation", "Spanish-speaking", "Muslim Teachers", "CNN"), tick=F)
dev.off()



